import requests
from bs4 import BeautifulSoup
import os
import lxml
import time
#请求头
Cata = "F:/mingxing/"
headers ={'User-Agent':"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0",
          'Referer':"http://www.mm131.com/mingxing/1897.html"}

def mkdir(outside_dir_name,path):
    try:
        path = path.strip()
        inside_dir_name = os.path.join(outside_dir_name,path)
        existed = os.path.exists(inside_dir_name)
        if not existed:
            print('创建文件夹：%s' % (path))
            os.makedirs(inside_dir_name)
        else:
            print('%s文件夹已存在' % (path))
            return None
        return inside_dir_name
    except:
        return None




def get_one_group(url,group_name):
    html = requests.get(url,headers=headers)
    soup = BeautifulSoup(html.content,"lxml")
    max_page = soup.find('div',class_='content-page').find_all('a')[-2].text
    dir_name = mkdir(Cata,group_name)
    if dir_name:
        get_one_page_img(soup,dir_name)
        for i in range(2,int(max_page)+1):
            _url = url.split(".html")[0]
            _url += "_%s.html" %(i)
            _html = requests.get(_url,headers=headers)
            _soup = BeautifulSoup(_html.content,"lxml")
            get_one_page_img(_soup,dir_name)


def get_one_page_img(soup,dir_name):
    img = soup.find('div',class_='content-pic').find('img')
    img_url = img['src']
    img_name = img['alt']
    download_img(img_url,dir_name)


def get_all_img(start_url):
    start_html = requests.get(start_url,headers=headers)
    soup = BeautifulSoup(start_html.content,'lxml')

    all_img = soup.find("div",class_ = "main").find("dl",class_="list-left public-box").find_all("a",attrs={"target":"_blank"},recursive=True)

    for a in all_img:
        print(a['href'])
        print(a.text)
        get_one_group(a['href'],a.text)
        time.sleep(1)


def download_img(img_url,dir_name):
    name = img_url.split('/')[-1]
    name = os.path.join(dir_name,name)
    img = requests.get(img_url,headers=headers)
    with open(name,'ab') as f:
        f.write(img.content)


get_all_img("http://www.mm131.com/mingxing")